/**************************************************/
/* Brand Davis Replication                        */
/* Kristin and Tracey                             */
/* Last modified 6 May 2012                       */
/* Previous file: Perkins Shollenberger Upload.R  */
/**************************************************/


********
**Use Jennie Brand HTE program
********
*ssc install hte


****Attendance

use "/Users/Kristin/Dropbox/gov2001 replication/from Brand/newanalysis19_n2013.dta", clear

local i_psc_vars "black hisp i_bmmaed i_bmfaed maedfaed i_parinc i_intact i_sibsz USborn i_farm i_south i_catholic i_jewish i_abil i_hsprog i_parencq i_frndplq i_mom18 good"

*calculate propensity score
pscore coll19enr `i_psc_vars' [pweight=weight], pscore(propsc) blockid(block) numblo(7)
bysort block: summ propsc

*run hte command
local i_psc_vars "black hisp i_bmmaed i_bmfaed maedfaed i_parinc i_intact i_sibsz USborn i_farm i_south i_catholic i_jewish i_abil i_hsprog i_parencq i_frndplq i_mom18 good"
hte children41 coll19enr `i_psc_vars' [pweight=weight], ///
	alpha(0.001) join(6 7) noisily estcom(poisson) estopts(irr) controls(1: hisp 6: propsc) nograph

clear all

****Completion

use "/Users/Kristin/Dropbox/gov2001 replication/from Brand/newanalysis23_n2013.dta", clear

local i_psc_vars "black hisp i_bmmaed i_bmfaed i_parinc i_intact i_sibsz i_farm i_south i_catholic i_jewish i_abil i_hsprog i_parencq i_frndplq i_mom18 i_mom22 good"

*calculate propensity score
pscore coll23comp `i_psc_vars' [pweight=weight], pscore(propsc) blockid(block) numblo(7)

*run hte command
local i_psc_vars "black hisp i_bmmaed i_bmfaed i_parinc i_intact i_sibsz i_farm i_south i_catholic i_jewish i_abil i_hsprog i_parencq i_frndplq i_mom18 i_mom22 good"
hte children41 coll23comp `i_psc_vars' [pweight=weight], ///
	alpha(0.001) join(6 7) noisily estcom(poisson) estopts(irr) controls(6: propsc) nograph

clear all


********
**Now adapt code to include expectation of kids
********


****Attendance with expectkid79 as both propensity score in R and control in Stata

use "/Users/Kristin/Dropbox/gov2001 replication/from Brand/expect19fit.dta", clear

local i_psc_vars "black hisp i_bmmaed i_bmfaed maedfaed i_parinc i_intact i_sibsz USborn i_farm i_south i_catholic i_jewish i_abil i_hsprog i_parencq i_frndplq i_mom18 good expectkid79"

*calculate propensity score
pscore coll19enr `i_psc_vars' [pweight=weight], pscore(propsc) blockid(block) numblo(7)
bysort block: summ propsc

*run hte command
local i_psc_vars "black hisp i_bmmaed i_bmfaed maedfaed i_parinc i_intact i_sibsz USborn i_farm i_south i_catholic i_jewish i_abil i_hsprog i_parencq i_frndplq i_mom18 good expectkid79"
hte children41 coll19enr `i_psc_vars' [pweight=weight], ///
	alpha(0.001) join(6 7) noisily estcom(poisson) estopts(irr) controls(expectkid79 1: hisp 6: propsc) nograph

clear all


****************
**FINAL FIGURES
****************



***FIGURE 1 - ACTUAL FERTILITY BY EXPECTED FERTILITY
***********

cd "/Users/traceyshollenberger/Dropbox/gov2001 replication/Data Extracts/Version 4/BrandDavis_v4-20120315193407380"
set more off	
use "/Users/traceyshollenberger/Dropbox/gov2001 replication/from Brand/analysis19block.dta", clear
merge 1:1 id using "Fertility_Plans.dta", gen(merge19)
*2,013 matched - the rest not matched from using 
keep if merge19==3
gen diffkid = children41 - expectkid79
summ diffkid
*look at expectkid79 distribution by college enrollment
*hist expectkid79 if coll19enr==0 [fw=weight]
*hist expectkid79 if coll19enr==1 [fw=weight]
tab expectkid79 coll19enr [fw=weight], col
tab expectkid79 coll19enr, col
*graph bar (sum) weight, over(expectkid79) over(coll19enr) percentage
*coarsen expectkid79
tab expectkid79
rename expectkid79 oldexpectkid79
recode oldexpectkid79 (6/12=.), gen(expectkid79)
gen group2=1 if coll19enr==0 & expectkid79==0
	replace group2=2 if coll19enr==0 & expectkid79==1
	replace group2=3 if coll19enr==0 & expectkid79==2
	replace group2=4 if coll19enr==0 & expectkid79==3
	replace group2=5 if coll19enr==0 & expectkid79==4
	replace group2=6 if coll19enr==0 & expectkid79==5
	replace group2=7 if coll19enr==1 & expectkid79==0
	replace group2=8 if coll19enr==1 & expectkid79==1
	replace group2=9 if coll19enr==1 & expectkid79==2
	replace group2=10 if coll19enr==1 & expectkid79==3
	replace group2=11 if coll19enr==1 & expectkid79==4
	replace group2=12 if coll19enr==1 & expectkid79==5
tab group2
*summ oldexpectkid79 if group==6
*summ oldexpectkid79 if group==12
gen tempweight=1
bysort group2: egen newweight=sum(tempweight)	
tab newweight
collapse (mean) desirekid79 diffkid children41 (semean) sedesire=desirekid79 sediffkid=diffkid sechildren41=children41 [fw=newweight], by(coll19enr expectkid79)
gen ci1children41= children41 - (2*sechildren41)
gen ci2children41 = children41 + (2*sechildren41)
graph twoway (scatter children41 expectkid79 if coll19enr==0) ///
	(scatter children41 expectkid79 if coll19enr==1) ///
	(line ci1children41 expectkid79 if coll19enr==0) ///
	(line ci1children41 expectkid79 if coll19enr==1) ///
	(line ci2children41 expectkid79 if coll19enr==0) ///
	(line ci2children41 expectkid79 if coll19enr==1), ///
	title("Figure 1. Actual Fertility by Expected Fertility") ///
		ytitle("Number of children observed (by age 41)") ///
		xtitle("Number of children expected (reported at baseline)") ///
		yscale(range(1 3)) ylabel(1(.2)3) ///
		xscale(range(0 5)) xlabel(0(1)5) ///
		legend(ring(0) pos(5) col(1) order (1 "Non-college women" 2 "College women"))
graph save "/Users/traceyshollenberger/Dropbox/gov2001 replication/Paper Drafts/Figure1_FINAL.gph", replace




***FIGURE 2 - SIZE OF OVERESTIMATE (diffkid) BY PROPENSITY SCORE
***********
cd "/Users/traceyshollenberger/Dropbox/gov2001 replication/Data Extracts/Version 4/BrandDavis_v4-20120315193407380"
set more off
use "/Users/traceyshollenberger/Dropbox/gov2001 replication/from Brand/analysis19block.dta", clear
merge 1:1 id using "Fertility_Plans.dta", gen(merge19)
*2,013 matched - the rest not matched from using
keep if merge19==3
gen diffkid = children41 - expectkid79
*calculate propensity score
local i_psc_vars "black hisp i_bmmaed i_bmfaed maedfaed i_parinc i_intact i_sibsz USborn i_farm i_south i_catholic i_jewish i_abil i_hsprog i_parencq i_frndplq i_mom18 good expectkid79"
pscore coll19enr `i_psc_vars' [pweight=weight], pscore(propsc) blockid(block) detail numblo(7)

gen tempweight = 1
gen group = 1 if stratum==1 & coll19enr==0
	replace group=2 if stratum==2 & coll19enr==0
	replace group=3 if stratum==3 & coll19enr==0
	replace group=4 if stratum==4 & coll19enr==0
	replace group=5 if stratum==5 & coll19enr==0
	replace group=6 if stratum==6 & coll19enr==0
	replace group=7 if stratum==1 & coll19enr==1
	replace group=8 if stratum==2 & coll19enr==1
	replace group=9 if stratum==3 & coll19enr==1
	replace group=10 if stratum==4 & coll19enr==1
	replace group=11 if stratum==5 & coll19enr==1
	replace group=12 if stratum==6 & coll19enr==1
tab group

bysort group: egen newweight=sum(tempweight)
collapse (mean) desirekid79 expectkid79 diffkid children41 (semean) sedesire=desirekid79 seexpect=expectkid79 sediffkid=diffkid sechildren41=children41 [fw=newweight], by(stratum coll19enr)

gen ci1diffkid= diffkid - (2*sediffkid)
gen ci2diffkid= diffkid + (2*sediffkid)

gen ci1children41= children41 - (2*sechildren41)
gen ci2children41 = children41 + (2*sechildren41)
gen overest= (diffkid * (-1))
gen ci1overest= (ci1diffkid * (-1))
gen ci2overest= (ci2diffkid * (-1))
graph twoway (scatter overest stratum if coll19enr==0)(scatter overest stratum if coll19enr==1)  ///
		(line ci1overest stratum if coll19enr==0)(line ci1overest stratum if coll19enr==1) ///
		(line ci2overest stratum if coll19enr==0)(line ci2overest stratum if coll19enr==1), ///
		title("Figure 2. Fertility Overestimate by Propensity to Attend College") ///
		ytitle("Gap between expected and actual number of kids") ///
		xtitle("Propensity score stratum") ///
		yscale(range(0 1.4)) ylabel(0(.2)1.4) ///
		legend(ring(0) pos(5) col(1) order (1 "Non-college women" 2 "College women") )
graph save "/Users/traceyshollenberger/Dropbox/gov2001 replication/Paper Drafts/Figure2_FINAL.gph", replace

